Q1
First I load the data files
Next I reshape dat into the handful data format. I use tidyr package for the aim instead of reshape package because the foramer is the improved version of the latter.
install.packages('tidyverse')
URL 'https://cran.rstudio.com/bin/macosx/mavericks/contrib/3.3/tidyverse_1.1.1.tgz' を試しています
Content type 'application/x-gzip' length 37228 bytes (36 KB)
==================================================
downloaded 36 KB
The downloaded binary packages are in
/var/folders/1w/nyst5xl16t329h4gpz0j49hm0000gn/T//Rtmp2exVAC/downloaded_packages
library(tidyverse)
Loading tidyverse: ggplot2
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
Conflicts with tidy packages ----------------------------------------------------------------------------------------------------------------------------------
filter(): dplyr, stats
lag(): dplyr, stats
dat <- dat %>%
tidyr::gather(key=From, value=value, Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,District\ of\ Columbia,Florida,Georgia,Hawaii,Idaho,Illinois,Indiana,Iowa,Kansas,Kentucky,Louisiana,Maine,Maryland,Massachusetts,Michigan,Minnesota,Mississippi,Missouri,Montana,Nebraska,Nevada,New\ Hampshire,New\ Jersey,New\ Mexico,New\ York,North\ Carolina,North\ Dakota,Ohio,Oklahoma,Oregon,Pennsylvania,Rhode\ Island,South\ Carolina,South\ Dakota,Tennessee,Texas,Utah,Vermont,Virginia,Washington,West\ Virginia,Wisconsin,Wyoming)
エラー: 想定外の入力です in:
"dat <- dat %>%
tidyr::gather(key=From, value=value, Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,District\"
Delete the puctuations.
Then I add two new columns to the above data frame.
d1 <- merge(dat, states, by.x="From", by.y="States")
d1 <- d1[,c(-5,-6)]
names(d1)[4] <- "ID_From"
d2 <- merge(d1, states, by.x="To", by.y="States")
d2 <- d2[,c(-6,-7)]
names(d2)[5] <- "ID_To"
dat <- d2
dat
Sort the data frame in the ascending order
dat <- arrange(dat, ID_From)
dat <- arrange(dat, ID_To)
dat
Convert the above data frame into the matrix form
Drawing the chord diagram
chorddiag(as.matrix(t5),groupColors=states$Color,showTicks=F,groupnamePadding = 20,groupThickness=.05,groupnameFontsize=10)
row names of the 'data' matrix differ from its column names or the 'groupNames' argument.
Q2
まだindividualを入れてないです。 Read “Stops On Lines” and the all GIS data of bus lines.
library(dplyr)
library(sp)
library(rgdal)
library(leaflet)
library(ggmap)
# Bus Stops
SOL <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/BusStops1216","StopsOnLines1216")
SOL.pj <- spTransform(SOL, CRS("+proj=longlat +datum=WGS84"))
# Bus Routes
CC <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/ComCir1216","ComCir1216")
CC.pj <- spTransform(CC, CRS("+proj=longlat +datum=WGS84"))
LE <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/LimExp1216","LimExp1216")
LE.pj <- spTransform(LE, CRS("+proj=longlat +datum=WGS84"))
LCBD <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/LocalCBD1216","LocalCBD1216")
LCBD.pj <- spTransform(LCBD, CRS("+proj=longlat +datum=WGS84"))
LNCBD <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/LocalNonCBD1216","LocalNonCBD1216")
LNCBD.pj <- spTransform(LNCBD, CRS("+proj=longlat +datum=WGS84"))
RBRT <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/RapidBRT1216","RapidBRT1216")
RBRT.pj <- spTransform(RBRT, CRS("+proj=longlat +datum=WGS84"))
# とりあえずindividual2についてのみreadできることを確認。一旦放置してこれ以外でできるか試す。
I2 <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/Individuals1216","2")
I2.pj <- spTransform(I2, CRS("+proj=longlat +datum=WGS84"))
make Line_list
tmp_CC <- geometry(CC.pj)
tmp_LE <- geometry(LE.pj)
tmp_LCBD <- geometry(LCBD.pj)
tmp_LNCBD <- geometry(LNCBD.pj)
tmp_RBRT <- geometry(RBRT.pj)
tmps <- list(tmp_CC, tmp_LE, tmp_LCBD, tmp_LNCBD, tmp_RBRT)
Line_list <- list()
for (i in 1:5){
for (j in 1:length(tmps[[i]])){
Line_list <- c(Line_list, tmps[[i]][j]@lines[[1]]@Lines)
}
}
make new_id
pjs <- list(CC.pj, LE.pj, LCBD.pj, LNCBD.pj, RBRT.pj)
LinLSs <- list()
for (i in 1:5){
LinLSs <- c(LinLSs, sapply(pjs[[i]]@lines, function(x) length(x@Lines)))
}
LinLSs <- LinLSs %>% unlist()
new_id <- sapply(1:length(LinLSs), function(x) paste0(x, "_", seq.int(LinLSs[[x]]))) %>%
unlist()
SLDF <- mapply(function(x, y) Lines(x, ID = y), x = Line_list, y = new_id) %>%
list() %>%
SpatialLines() %>%
SpatialLinesDataFrame(data = DAT)
SpatialLines(.) でエラー:
lines list not exclusively filled with Lines objects
make new lines and LA map
---
title: "Big Data Analytics Assignment 1"
output: html_notebook
---

## Q1

First I load the data files
```{r}
dat <- read.csv("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/migration2012.csv")
head(dat)
```

```{r}
states <- read.csv("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/states_chord.csv")
states
```

Next I reshape dat into the handful data format. I use **tidyr package** for the aim instead of **reshape package** because the foramer is the improved version of the latter.

```{r}
install.packages('tidyverse')
library(tidyverse)
```

```{r}
dat <- dat %>%
tidyr::gather(key=From, value=value, Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,District.of.Columbia,Florida,Georgia,Hawaii,Idaho,Illinois,Indiana,Iowa,Kansas,Kentucky,Louisiana,Maine,Maryland,Massachusetts,Michigan,Minnesota,Mississippi,Missouri,Montana,Nebraska,Nevada,New.Hampshire,New.Jersey,New.Mexico,New.York,North.Carolina,North.Dakota,Ohio,Oklahoma,Oregon,Pennsylvania,Rhode.Island,South.Carolina,South.Dakota,Tennessee,Texas,Utah,Vermont,Virginia,Washington,West.Virginia,Wisconsin,Wyoming)
```

Delete the puctuations.

```{r}
dat <- data.frame(apply(dat, 2, function(y) gsub("[[:punct:]]", " ", y)))
dat
```

Then I add two new columns to the above data frame.

```{r}
d1 <- merge(dat, states, by.x="From", by.y="States")
d1 <- d1[,c(-5,-6)]
names(d1)[4] <- "ID_From"

d2 <- merge(d1, states, by.x="To", by.y="States")
d2 <- d2[,c(-6,-7)]
names(d2)[5] <- "ID_To"
dat <- d2
dat
```

Sort the data frame in the ascending order

```{r}
dat <- arrange(dat, ID_From)
dat <- arrange(dat, ID_To)
dat
```

Convert the above data frame into the matrix form

```{r}
t1 <- dat[,c(1,2,4,5)]
t1 <- t1 %>%
  spread(key=To, value=ID_To)

t2 <- dat[,c(1,2,3,4)]
t2 <- t2 %>%
  spread(key=To, value=value)

t3 <- rbind(t1[1,],t2)
t3$From <- as.character(t3$From)
t3[1,2] <- 0
t3[1,1] <- "ID_To"
t3 <- t3 %>%
  arrange(ID_From)

library(data.table)
setcolorder(t3,c("From","ID_From","Connecticut","Maine","Massachusetts","New Hampshire","Rhode Island",
"Vermont","New Jersey","New York","Pennsylvania","Illinois","Indiana",
"Michigan","Ohio","Wisconsin","Iowa","Kansas","Minnesota",
"Missouri","Nebraska","North Dakota","South Dakota","Delaware","Florida",
"Georgia","Maryland","North Carolina","South Carolina","Virginia","District of Columbia",
"West Virginia","Alabama","Kentucky","Mississippi","Tennessee","Arkansas",
"Louisiana","Oklahoma","Texas","Arizona","Colorado","Idaho",
"Montana","Nevada","New Mexico","Utah","Wyoming","Alaska",
"California","Hawaii","Oregon","Washington"))

t3 <- t3[c(-1),c(-2)]

t4 <- t3[,-1]
rownames(t4) <- t3[,1]

t5 <- data.frame(apply(t4, 2, function(y) as.numeric(y)))
rownames(t5) <- t3[,1]
t5
```

Drawing the chord diagram
```{r}
library(chorddiag)

chorddiag(as.matrix(t5),groupColors=states$Color,showTicks=F,groupnamePadding=20,groupThickness=.05,groupnameFontsize=10)
```


## Q2

まだindividualを入れてないです。
Read "Stops On Lines" and the all GIS data of bus lines.
```{r}
library(dplyr)
library(sp)
library(rgdal)
library(leaflet)
library(ggmap)

# Bus Stops
SOL <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/BusStops1216","StopsOnLines1216")
SOL.pj <- spTransform(SOL, CRS("+proj=longlat +datum=WGS84"))

# Bus Routes
CC <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/ComCir1216","ComCir1216")
CC.pj <- spTransform(CC, CRS("+proj=longlat +datum=WGS84"))
LE <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/LimExp1216","LimExp1216")
LE.pj <- spTransform(LE, CRS("+proj=longlat +datum=WGS84"))
LCBD <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/LocalCBD1216","LocalCBD1216")
LCBD.pj <- spTransform(LCBD, CRS("+proj=longlat +datum=WGS84"))
LNCBD <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/LocalNonCBD1216","LocalNonCBD1216")
LNCBD.pj <- spTransform(LNCBD, CRS("+proj=longlat +datum=WGS84"))
RBRT <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/RapidBRT1216","RapidBRT1216")
RBRT.pj <- spTransform(RBRT, CRS("+proj=longlat +datum=WGS84"))

# とりあえずindividual2についてのみreadできることを確認。一旦放置してこれ以外でできるか試す。
I2 <- readOGR("/Users/susu/Desktop/Hong\ Kong/Semester2/Big_Data/assignment_data/as1/Individuals1216","2")
I2.pj <- spTransform(I2, CRS("+proj=longlat +datum=WGS84"))
```

make Line_list
```{r}
tmp_CC <- geometry(CC.pj)
tmp_LE <- geometry(LE.pj)
tmp_LCBD <- geometry(LCBD.pj)
tmp_LNCBD <- geometry(LNCBD.pj)
tmp_RBRT <- geometry(RBRT.pj)
tmps <- list(tmp_CC, tmp_LE, tmp_LCBD, tmp_LNCBD, tmp_RBRT)

Line_list <- list()
for (i in 1:5){
  for (j in 1:length(tmps[[i]])){
    Line_list <- c(Line_list, tmps[[i]][j]@lines[[1]]@Lines)
  }
}
```


make new_id
```{r}
pjs <- list(CC.pj, LE.pj, LCBD.pj, LNCBD.pj, RBRT.pj)
LinLSs <- list()
for (i in 1:5){
 LinLSs <- c(LinLSs, sapply(pjs[[i]]@lines, function(x) length(x@Lines)))
}
LinLSs <- LinLSs %>% unlist()

new_id <- sapply(1:length(LinLSs), function(x) paste0(x, "_", seq.int(LinLSs[[x]]))) %>% 
  unlist()
```

```{r}
## make a new data.frame (only route_id)
DAT=data.frame(matrix(rep(NA,1),nrow=1))[-1,]
for (i in 1:5){
  df <- data.frame(route_id = pjs[[i]]@data$VAR_IDENT)
  DAT <- rbind(DAT, df)
}
rownames(DAT) <- new_id

SLDF <- mapply(function(x, y) Lines(x, ID = y), x = Line_list, y = new_id) %>%
  #list() %>%
  SpatialLines() %>% 
  SpatialLinesDataFrame(data = DAT)
```


make new lines and LA map
```{r}
dat <- geocode('Los Angels')

leaflet() %>%
  setView(lng = dat['lon'], lat = dat['lat']	, zoom = 11) %>%
  addPolylines(data = SLDF, color = "black", opacity = 1, weight = 1) %>% 
  addCircles(data=SOL.pj@data,~LONG, ~LAT, color = "red", weight = 0.3) %>%
  addTiles()
```


## Q3

The code is as follows
```{r}
library(quantmod)
library(highcharter)

x <- getSymbols("AUD/JPY", src = "oanda", auto.assign = FALSE)
y <- getSymbols("GBP/USD", src = "oanda", auto.assign = FALSE)


```
























